#!/usr/bin/env python
# coding=utf-8
# __author__ = 'Yunchao Ling'

file_paths = [
    "/Users/genesis/Seafile/Work/营养知识图谱/nutrition-database-result/Axiom_CAS-CN1.na36.r1.a1.annot.nutrition.sort.csv",
    "/Users/genesis/Seafile/Work/营养知识图谱/nutrition-database-result/PharmacoScan_96F.na36.r9.a4.annot.nutrition.sort.csv",
    "/Users/genesis/Seafile/Work/营养知识图谱/nutrition-database-result/Axiom_PharmacoFocus.na36.r4.a1.annot.nutrition.sort.csv",
    "/Users/genesis/Seafile/Work/营养知识图谱/nutrition-database-result/Axiom_PMDA.na36.r7.a8.annot.nutrition.sort.csv",
    "/Users/genesis/Seafile/Work/营养知识图谱/nutrition-database-result/Axiom_APMRA.na35.r3.a2.annot.nutrition.sort.csv"]

set_rsid = set()
for file_path in file_paths:
    infile = open(file_path, "r")
    for line in infile:
        line = line.rstrip("\n")
        if not line.startswith("#") or line.startswith("\"Probe Set ID\""):
            rsid = line.split(",")[2][1:-1]
            if rsid.startswith("rs"):
                set_rsid.add(rsid)
    infile.close()

outfile = open("/Users/genesis/Seafile/Work/营养知识图谱/nutrition-database-result/array_list.txt", "w")
for rsid in set_rsid:
    outfile.write(rsid + "\n")
    outfile.flush()
outfile.close()
